import pandas as pd
import plotly.express as px
import numpy as np
df = pd.read_csv('insurance.csv')
df.head()
| age | sex | bmi | children | smoker | region | charges | |
|---|---|---|---|---|---|---|---|
| 0 | 19 | female | 27.900 | 0 | yes | southwest | 16884.92400 |
| 1 | 18 | male | 33.770 | 1 | no | southeast | 1725.55230 |
| 2 | 28 | male | 33.000 | 3 | no | southeast | 4449.46200 |
| 3 | 33 | male | 22.705 | 0 | no | northwest | 21984.47061 |
| 4 | 32 | male | 28.880 | 0 | no | northwest | 3866.85520 |
df.region.value_counts()
southeast 364 southwest 325 northwest 325 northeast 324 Name: region, dtype: int64
df.shape
(1338, 7)
results = pd.read_csv('results.csv')
results.head()
| Age | Sex | Bmi | Children | Smoker | Region | ActualValue | PredictedValue | |
|---|---|---|---|---|---|---|---|---|
| 0 | 32.0 | 1.0 | 28.88 | 0.0 | 1.0 | 1.0 | 3866.8552 | 5725.318890 |
| 1 | 25.0 | 1.0 | 26.22 | 0.0 | 1.0 | 0.5 | 2721.3208 | 2807.816971 |
| 2 | 23.0 | 1.0 | 34.40 | 0.0 | 1.0 | -0.5 | 1826.8430 | 4664.712803 |
| 3 | 56.0 | -1.0 | 39.82 | 0.0 | 1.0 | -1.0 | 11090.7178 | 15065.692717 |
| 4 | 19.0 | 1.0 | 24.60 | 1.0 | 1.0 | -0.5 | 1837.2370 | 741.034551 |
fig = px.scatter(results, x='ActualValue', y='PredictedValue')
x = np.linspace(results.ActualValue.min(), results.ActualValue.max(), num=1000)
fig.add_scatter(x=x, y=x, name='y=x')
fig.show()